In [1]:
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import plotly.express as px
import seaborn as sns 
from matplotlib import style
import missingno as msno
from IPython.core.interactiveshell import InteractiveShell
In [2]:
# reading data
df=pd.read_csv('Downloads/Project 5/sales_data.csv')
d=df.copy(deep=True)
d
Out[2]:
Order Date Order ID Product Product_ean Category Purchase Address Quantity Ordered Price Each Cost price turnover margin
0 1/22/2019 21:25 141234 iPhone 5.640000e+12 Phones 944 Walnut St, Boston, MA 02215 1 700.00 231.0000 700.00 469.0000
1 1/28/2019 14:15 141235 Lightning Charging Cable 5.560000e+12 Accessories 185 Maple St, Portland, OR 97035 1 14.95 7.4750 14.95 7.4750
2 1/17/2019 13:33 141236 Wired Headphones 2.110000e+12 Accessories 538 Adams St, San Francisco, CA 94016 2 11.99 5.9950 23.98 11.9900
3 1/5/2019 20:33 141237 27in FHD Monitor 3.070000e+12 NaN 738 10th St, Los Angeles, CA 90001 1 149.99 97.4935 149.99 52.4965
4 1/25/2019 11:59 141238 Wired Headphones 9.690000e+12 Accessories 387 10th St, Austin, TX 73301 1 11.99 5.9950 11.99 5.9950
... ... ... ... ... ... ... ... ... ... ... ...
185961 12/11/2019 20:58 319666 Lightning Charging Cable 6.550000e+12 Accessories 14 Madison St, San Francisco, CA 94016 1 14.95 7.4750 14.95 7.4750
185962 12/1/2019 12:01 319667 AA Batteries (4-pack) 5.350000e+12 Accessories 549 Willow St, Los Angeles, CA 90001 2 3.84 1.9200 7.68 3.8400
185963 12/9/2019 6:43 319668 Vareebadd Phone 2.670000e+12 Accessories 273 Wilson St, Seattle, WA 98101 1 400.00 132.0000 400.00 268.0000
185964 12/3/2019 10:39 319669 Wired Headphones 5.220000e+12 Accessories 778 River St, Dallas, TX 75001 1 11.99 5.9950 11.99 5.9950
185965 12/21/2019 21:45 319670 Bose SoundSport Headphones 8.080000e+12 Accessories 747 Chestnut St, Los Angeles, CA 90001 1 99.99 49.9950 99.99 49.9950

185966 rows × 11 columns

Data Understanding¶

In [3]:
d.sample(10)
Out[3]:
Order Date Order ID Product Product_ean Category Purchase Address Quantity Ordered Price Each Cost price turnover margin
135276 10/26/2019 14:39 271020 Bose SoundSport Headphones 1.980000e+12 Accessories 958 1st St, Boston, MA 02215 1 99.99 49.9950 99.99 49.9950
120628 9/21/2019 8:56 256960 AA Batteries (4-pack) 6.190000e+12 Accessories 453 Center St, San Francisco, CA 94016 1 3.84 1.9200 3.84 1.9200
166348 12/16/2019 11:26 300803 Lightning Charging Cable 4.640000e+12 Accessories 555 Church St, San Francisco, CA 94016 1 14.95 7.4750 14.95 7.4750
177118 12/2/2019 13:18 311171 Bose SoundSport Headphones 7.720000e+12 Accessories 56 Washington St, Los Angeles, CA 90001 1 99.99 49.9950 99.99 49.9950
62277 5/28/2019 3:01 200928 AA Batteries (4-pack) 5.380000e+12 Accessories 622 South St, Boston, MA 02215 1 3.84 1.9200 3.84 1.9200
25080 3/3/2019 17:22 165260 34in Ultrawide Monitor 6.400000e+12 Appliances 716 Washington St, San Francisco, CA 94016 1 379.99 125.3967 379.99 254.5933
118766 9/27/2019 11:24 255152 AA Batteries (4-pack) 4.330000e+12 Accessories 235 Wilson St, Austin, TX 73301 1 3.84 1.9200 3.84 1.9200
121423 9/21/2019 12:31 257728 27in 4K Gaming Monitor 8.180000e+12 Appliances 733 Walnut St, New York City, NY 10001 1 389.99 128.6967 389.99 261.2933
126973 10/20/2019 14:21 263054 Bose SoundSport Headphones 2.030000e+12 Accessories 20 Ridge St, Seattle, WA 98101 1 99.99 49.9950 99.99 49.9950
18766 2/16/2019 19:40 159208 Bose SoundSport Headphones 3.410000e+12 Accessories 688 Washington St, New York City, NY 10001 1 99.99 49.9950 99.99 49.9950
In [4]:
d.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 185966 entries, 0 to 185965
Data columns (total 11 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   Order Date        185966 non-null  object 
 1   Order ID          185966 non-null  int64  
 2   Product           185966 non-null  object 
 3   Product_ean       185966 non-null  float64
 4   Category          185366 non-null  object 
 5   Purchase Address  185966 non-null  object 
 6   Quantity Ordered  185966 non-null  int64  
 7   Price Each        185734 non-null  float64
 8   Cost price        185966 non-null  float64
 9   turnover          185966 non-null  float64
 10  margin            185966 non-null  float64
dtypes: float64(5), int64(2), object(4)
memory usage: 15.6+ MB
In [5]:
d.describe()
Out[5]:
Order ID Product_ean Quantity Ordered Price Each Cost price turnover margin
count 185966.000000 1.859660e+05 185966.000000 185734.000000 185966.000000 185966.000000 185966.000000
mean 230414.910914 5.509176e+12 1.124388 184.040854 69.670555 185.496050 115.292602
std 51513.524171 2.598419e+12 0.442801 332.712540 109.427028 332.928676 225.233281
min 141234.000000 1.000000e+12 1.000000 2.990000 1.495000 2.990000 1.495000
25% 185828.250000 3.250000e+12 1.000000 11.950000 5.975000 11.950000 5.975000
50% 230368.500000 5.510000e+12 1.000000 14.950000 7.475000 14.950000 7.475000
75% 275032.000000 7.770000e+12 1.000000 150.000000 97.500000 150.000000 52.500000
max 319670.000000 1.000000e+13 9.000000 1700.000000 561.000000 3400.000000 2278.000000

Data Preprocessing¶

Nulls using missingno

In [6]:
InteractiveShell.ast_node_interactivity = "all"
msno.bar(d)
Out[6]:
<Axes: >
In [7]:
msno.matrix(df.sort_values(by='Order ID'))
Out[7]:
<Axes: >
In [8]:
msno.heatmap(d,cmap='GnBu')
Out[8]:
<Axes: >

Duplicated values

In [9]:
d[d.duplicated(keep=False)]
Out[9]:
Order Date Order ID Product Product_ean Category Purchase Address Quantity Ordered Price Each Cost price turnover margin
6 1/26/2019 12:16 141240 27in 4K Gaming Monitor 5.170000e+12 Appliances 979 Park St, Los Angeles, CA 90001 1 389.99 128.6967 389.99 261.2933
7 1/5/2019 12:04 141241 USB-C Charging Cable 8.050000e+12 NaN 181 6th St, San Francisco, CA 94016 1 11.95 5.9750 11.95 5.9750
8 1/1/2019 10:30 141242 Bose SoundSport Headphones 1.510000e+12 Accessories 867 Willow St, Los Angeles, CA 90001 1 99.99 49.9950 99.99 49.9950
9 1/22/2019 21:20 141243 Apple Airpods Headphones 1.390000e+12 Accessories 657 Johnson St, San Francisco, CA 94016 1 150.00 97.5000 150.00 52.5000
10 1/7/2019 11:29 141244 Apple Airpods Headphones 4.330000e+12 Accessories 492 Walnut St, San Francisco, CA 94016 1 150.00 97.5000 150.00 52.5000
11 1/31/2019 10:12 141245 Macbook Pro Laptop 1.170000e+12 electronics 322 6th St, San Francisco, CA 94016 1 1700.00 561.0000 1700.00 1139.0000
12 1/9/2019 18:57 141246 AAA Batteries (4-pack) 4.440000e+12 Accessories 618 7th St, Los Angeles, CA 90001 3 2.99 1.4950 8.97 4.4850
13 1/26/2019 12:16 141240 27in 4K Gaming Monitor 5.170000e+12 Appliances 979 Park St, Los Angeles, CA 90001 1 389.99 128.6967 389.99 261.2933
14 1/5/2019 12:04 141241 USB-C Charging Cable 8.050000e+12 NaN 181 6th St, San Francisco, CA 94016 1 11.95 5.9750 11.95 5.9750
15 1/1/2019 10:30 141242 Bose SoundSport Headphones 1.510000e+12 Accessories 867 Willow St, Los Angeles, CA 90001 1 99.99 49.9950 99.99 49.9950
16 1/22/2019 21:20 141243 Apple Airpods Headphones 1.390000e+12 Accessories 657 Johnson St, San Francisco, CA 94016 1 150.00 97.5000 150.00 52.5000
17 1/7/2019 11:29 141244 Apple Airpods Headphones 4.330000e+12 Accessories 492 Walnut St, San Francisco, CA 94016 1 150.00 97.5000 150.00 52.5000
18 1/31/2019 10:12 141245 Macbook Pro Laptop 1.170000e+12 electronics 322 6th St, San Francisco, CA 94016 1 1700.00 561.0000 1700.00 1139.0000
19 1/9/2019 18:57 141246 AAA Batteries (4-pack) 4.440000e+12 Accessories 618 7th St, Los Angeles, CA 90001 3 2.99 1.4950 8.97 4.4850
90539 7/6/2019 16:53 228015 AA Batteries (4-pack) 8.850000e+12 Accessories 162 Ridge St, Boston, MA 02215 1 3.84 1.9200 3.84 1.9200
90540 7/6/2019 16:53 228015 AA Batteries (4-pack) 8.850000e+12 Accessories 162 Ridge St, Boston, MA 02215 1 3.84 1.9200 3.84 1.9200
108023 8/13/2019 20:13 244840 iPhone 7.390000e+12 Phones 817 Cedar St, Dallas, TX 75001 1 700.00 231.0000 700.00 469.0000
108024 8/11/2019 9:42 244841 AA Batteries (4-pack) 4.440000e+12 Accessories 130 Jefferson St, San Francisco, CA 94016 2 3.84 1.9200 7.68 3.8400
108025 8/16/2019 11:12 244842 USB-C Charging Cable 4.140000e+12 Accessories 364 Cedar St, Atlanta, GA 30301 1 11.95 5.9750 11.95 5.9750
108026 8/7/2019 15:55 244843 Wired Headphones 6.420000e+12 Accessories 895 Hickory St, New York City, NY 10001 1 11.99 5.9950 11.99 5.9950
108027 8/1/2019 16:25 244844 Lightning Charging Cable 9.940000e+12 Accessories 29 Center St, Boston, MA 02215 1 14.95 7.4750 14.95 7.4750
108028 8/31/2019 12:48 244845 Apple Airpods Headphones 4.740000e+12 Accessories 11 Park St, Atlanta, GA 30301 1 150.00 97.5000 150.00 52.5000
108029 8/13/2019 17:52 244846 Bose SoundSport Headphones 9.760000e+12 Accessories 31 Spruce St, New York City, NY 10001 1 99.99 49.9950 99.99 49.9950
108030 8/12/2019 19:56 244847 Vareebadd Phone 2.700000e+12 Accessories 183 Willow St, Los Angeles, CA 90001 1 400.00 132.0000 400.00 268.0000
108031 8/1/2019 20:29 244848 Lightning Charging Cable 6.240000e+12 Accessories 360 Johnson St, Boston, MA 02215 1 14.95 7.4750 14.95 7.4750
108032 8/13/2019 20:13 244840 iPhone 7.390000e+12 Phones 817 Cedar St, Dallas, TX 75001 1 700.00 231.0000 700.00 469.0000
108033 8/11/2019 9:42 244841 AA Batteries (4-pack) 4.440000e+12 Accessories 130 Jefferson St, San Francisco, CA 94016 2 3.84 1.9200 7.68 3.8400
108034 8/16/2019 11:12 244842 USB-C Charging Cable 4.140000e+12 Accessories 364 Cedar St, Atlanta, GA 30301 1 11.95 5.9750 11.95 5.9750
108035 8/7/2019 15:55 244843 Wired Headphones 6.420000e+12 Accessories 895 Hickory St, New York City, NY 10001 1 11.99 5.9950 11.99 5.9950
108036 8/1/2019 16:25 244844 Lightning Charging Cable 9.940000e+12 Accessories 29 Center St, Boston, MA 02215 1 14.95 7.4750 14.95 7.4750
108037 8/31/2019 12:48 244845 Apple Airpods Headphones 4.740000e+12 Accessories 11 Park St, Atlanta, GA 30301 1 150.00 97.5000 150.00 52.5000
108038 8/13/2019 17:52 244846 Bose SoundSport Headphones 9.760000e+12 Accessories 31 Spruce St, New York City, NY 10001 1 99.99 49.9950 99.99 49.9950
108039 8/12/2019 19:56 244847 Vareebadd Phone 2.700000e+12 Accessories 183 Willow St, Los Angeles, CA 90001 1 400.00 132.0000 400.00 268.0000
108040 8/1/2019 20:29 244848 Lightning Charging Cable 6.240000e+12 Accessories 360 Johnson St, Boston, MA 02215 1 14.95 7.4750 14.95 7.4750
In [10]:
d.drop_duplicates(inplace=True)
d[d.duplicated(keep=False)]
Out[10]:
Order Date Order ID Product Product_ean Category Purchase Address Quantity Ordered Price Each Cost price turnover margin

Converting types

In [55]:
d['Product']=d['Product'].astype('string')
d['Order Date']=pd.to_datetime(d['Order Date'])
d['Purchase Address']=d['Purchase Address'].astype('string')
d.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 185949 entries, 0 to 185965
Data columns (total 11 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   Order Date        185949 non-null  datetime64[ns]
 1   Order ID          185949 non-null  int64         
 2   Product           185949 non-null  string        
 3   Product_ean       185949 non-null  float64       
 4   Category          185949 non-null  object        
 5   Purchase Address  185949 non-null  string        
 6   Quantity Ordered  185949 non-null  int64         
 7   Price Each        185949 non-null  float64       
 8   Cost price        185949 non-null  float64       
 9   turnover          185949 non-null  float64       
 10  margin            185949 non-null  float64       
dtypes: datetime64[ns](1), float64(5), int64(2), object(1), string(2)
memory usage: 21.1+ MB

Detecting outliers in each column using boxplot in plotly

In [12]:
# for i in d.columns:
#     fig = px.box(d, y=i)
#     fig.write_html(f'outliers{i}.html')
#     fig.show()

fixing missing values

In [13]:
d.isnull().sum()
Out[13]:
Order Date            0
Order ID              0
Product               0
Product_ean           0
Category            599
Purchase Address      0
Quantity Ordered      0
Price Each          232
Cost price            0
turnover              0
margin                0
dtype: int64
In [14]:
d['Category'].fillna('Accessories',inplace=True)
d['Price Each'].fillna(d['Price Each'].median(),inplace=True)
d.isnull().sum()
Out[14]:
Order Date          0
Order ID            0
Product             0
Product_ean         0
Category            0
Purchase Address    0
Quantity Ordered    0
Price Each          0
Cost price          0
turnover            0
margin              0
dtype: int64

EDA & Visualization¶

In [15]:
d.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 185949 entries, 0 to 185965
Data columns (total 11 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   Order Date        185949 non-null  datetime64[ns]
 1   Order ID          185949 non-null  int64         
 2   Product           185949 non-null  string        
 3   Product_ean       185949 non-null  float64       
 4   Category          185949 non-null  object        
 5   Purchase Address  185949 non-null  object        
 6   Quantity Ordered  185949 non-null  int64         
 7   Price Each        185949 non-null  float64       
 8   Cost price        185949 non-null  float64       
 9   turnover          185949 non-null  float64       
 10  margin            185949 non-null  float64       
dtypes: datetime64[ns](1), float64(5), int64(2), object(2), string(1)
memory usage: 17.0+ MB
In [16]:
d.describe()
Out[16]:
Order ID Product_ean Quantity Ordered Price Each Cost price turnover margin
count 185949.000000 1.859490e+05 185949.000000 185949.000000 185949.000000 185949.000000 185949.000000
mean 230417.582299 5.509192e+12 1.124384 183.825650 69.668947 185.491894 115.290031
std 51512.875322 2.598407e+12 0.442794 332.550136 109.424372 332.920400 225.227642
min 141234.000000 1.000000e+12 1.000000 2.990000 1.495000 2.990000 1.495000
25% 185831.000000 3.250000e+12 1.000000 11.950000 5.975000 11.950000 5.975000
50% 230368.000000 5.510000e+12 1.000000 14.950000 7.475000 14.950000 7.475000
75% 275036.000000 7.770000e+12 1.000000 150.000000 97.500000 150.000000 52.500000
max 319670.000000 1.000000e+13 9.000000 1700.000000 561.000000 3400.000000 2278.000000

Count categories

In [17]:
fig = px.scatter(d,width =600,height = 600, x="Quantity Ordered", y="Cost price", color="Category" )
fig.show()
In [18]:
plt.figure(figsize=(10, 6)) 
sns.kdeplot(data=d, x='Cost price', hue='Category', fill=True) 
plt.xlabel('Cost price') 
plt.ylabel('Density') 
plt.title('Price Distribution by Category') 
plt.show()
Out[18]:
<Figure size 1000x600 with 0 Axes>
Out[18]:
<Axes: xlabel='Cost price', ylabel='Density'>
Out[18]:
Text(0.5, 0, 'Cost price')
Out[18]:
Text(0, 0.5, 'Density')
Out[18]:
Text(0.5, 1.0, 'Price Distribution by Category')
In [19]:
sns.pairplot(d[['Cost price', 'Quantity Ordered', 'Category']],hue='Category' , diag_kind='kde') 
# plt.title('Pairwise Scatter Plots - Cost price, Quantity, and Product') 
# plt.suptitle('Pairwise Scatter Plots - Sales, Quantity, and Age',y=1)
plt.show()
Out[19]:
<seaborn.axisgrid.PairGrid at 0x16df1a07d50>

Questions¶

a) what is the relantion between Category and Cost price?

In [20]:
plt.figure(figsize=(8, 6)) 
sns.boxplot(y='Cost price', x='Category', data=d) 
plt.xlabel(' Category') 
plt.ylabel('Cost Sales') 
plt.show()
Out[20]:
<Figure size 800x600 with 0 Axes>
Out[20]:
<Axes: xlabel='Category', ylabel='Cost price'>
Out[20]:
Text(0.5, 0, ' Category')
Out[20]:
Text(0, 0.5, 'Cost Sales')

b) what is the relation between Cost price , Category and Quantity?

In [21]:
plt.figure(figsize=(10, 6)) 
marital_type_sales = df.pivot_table(index='Quantity Ordered', columns='Category', values='Cost price', aggfunc='sum') 
marital_type_sales.plot(kind='bar', stacked=True) 
plt.xlabel('Quantity Ordered') 
plt.ylabel('Cost price') 
plt.legend(title='Category', bbox_to_anchor=(1, 1)) 
plt.show()
Out[21]:
<Figure size 1000x600 with 0 Axes>
Out[21]:
<Axes: xlabel='Quantity Ordered'>
Out[21]:
Text(0.5, 0, 'Quantity Ordered')
Out[21]:
Text(0, 0.5, 'Cost price')
Out[21]:
<matplotlib.legend.Legend at 0x16d86c51cd0>
<Figure size 1000x600 with 0 Axes>

c) what is the correlation between features?

In [22]:
correlation_matrix = d.corr(numeric_only=True) 
sns.heatmap(correlation_matrix, annot=True,
             cmap='coolwarm'
             ) 
plt.title('Correlation Matrix') 
plt.show()
Out[22]:
<Axes: >
Out[22]:
Text(0.5, 1.0, 'Correlation Matrix')

d) what is the ratio between categories?

In [23]:
ratio= d.groupby('Category')['Category'].count()

plt.pie(ratio, labels=ratio.index,
             autopct='%1.1f%%',wedgeprops=dict(width=0.5)
               ) 
plt.title('Categories percentage') 
plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. 
plt.show()
Out[23]:
([<matplotlib.patches.Wedge at 0x16d85c50150>,
  <matplotlib.patches.Wedge at 0x16d86d37350>,
  <matplotlib.patches.Wedge at 0x16d86d48e50>,
  <matplotlib.patches.Wedge at 0x16d86d4ac90>],
 [Text(-0.7135583623522399, 0.8371585653370511, 'Accessories'),
  Text(0.3756370504333411, -1.0338746569781754, 'Appliances'),
  Text(0.961878432899624, -0.5336570812071771, 'Phones'),
  Text(1.087785030634161, -0.1634739340942089, 'electronics')],
 [Text(-0.38921365219213083, 0.4566319447293006, '72.5%'),
  Text(0.2048929366000042, -0.5639316310790047, '16.2%'),
  Text(0.5246609633997948, -0.29108568065846024, '6.6%'),
  Text(0.593337289436815, -0.08916760041502303, '4.7%')])
Out[23]:
Text(0.5, 1.0, 'Categories percentage')
Out[23]:
(-1.0999956194338132,
 1.0999997914016102,
 -1.0999985133508985,
 1.0999982307209664)

e) what is the most products sold?

In [24]:
ratio= d.groupby('Product')['Quantity Ordered'].sum()
fig, ax = plt.subplots()
plt.bar(ratio.index, ratio.values) 
plt.xticks(rotation=90)  
plt.xlabel('Sales')  
plt.ylabel('Amount') 
plt.show() 
Out[24]:
<BarContainer object of 19 artists>
Out[24]:
([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
 [Text(0, 0, '20in Monitor'),
  Text(1, 0, '27in 4K Gaming Monitor'),
  Text(2, 0, '27in FHD Monitor'),
  Text(3, 0, '34in Ultrawide Monitor'),
  Text(4, 0, 'AA Batteries (4-pack)'),
  Text(5, 0, 'AAA Batteries (4-pack)'),
  Text(6, 0, 'Apple Airpods Headphones'),
  Text(7, 0, 'Bose SoundSport Headphones'),
  Text(8, 0, 'Flatscreen TV'),
  Text(9, 0, 'Google Phone'),
  Text(10, 0, 'LG Dryer'),
  Text(11, 0, 'LG Washing Machine'),
  Text(12, 0, 'Lightning Charging Cable'),
  Text(13, 0, 'Macbook Pro Laptop'),
  Text(14, 0, 'ThinkPad Laptop'),
  Text(15, 0, 'USB-C Charging Cable'),
  Text(16, 0, 'Vareebadd Phone'),
  Text(17, 0, 'Wired Headphones'),
  Text(18, 0, 'iPhone')])
Out[24]:
Text(0.5, 0, 'Sales')
Out[24]:
Text(0, 0.5, 'Amount')

f) what is the number of unique values in Price Each?

In [25]:
np.count_nonzero(d['Price Each'].unique())
Out[25]:
17

g) what happend to cost price over time ?

In [26]:
fig = px.line(d, x='Order Date', y='Cost price', color='Product',markers=True)
fig.show()

h) how weekday sales is going?

In [27]:
weekday_sales = d['Order Date'].dt.day_name()

weekday_sales.value_counts().plot(kind='bar',
                                  figsize=(10, 6)) 
plt.xlabel('Weekday') 
plt.ylabel('Sales Count') 
plt.title('Sales by Weekday') 
plt.show()
Out[27]:
<Axes: >
Out[27]:
Text(0.5, 0, 'Weekday')
Out[27]:
Text(0, 0.5, 'Sales Count')
Out[27]:
Text(0.5, 1.0, 'Sales by Weekday')

i) how will be the relation & the best fit line between Price Each & Cost price?

In [31]:
fig = px.scatter(d,width =600,height = 600, x="Cost price", y="Price Each",
                 trendline="ols")
fig.show()

j)how will be Cost price Trend?

In [33]:
pt= d.pivot_table(index='Order Date',
                                  columns='Category',
                                    values='Cost price', aggfunc='sum') 
pt.resample('M').sum().plot(kind='area', figsize=(10, 6)) 

plt.xlabel('Date') 
plt.ylabel('Cost price') 
plt.title('Cost price Trend by Category') 
plt.show()
Out[33]:
<Axes: xlabel='Order Date'>
Out[33]:
Text(0.5, 0, 'Date')
Out[33]:
Text(0, 0.5, 'Cost price')
Out[33]:
Text(0.5, 1.0, 'Cost price Trend by Category')

ADDING NEW FEATURES¶

In [71]:
c=d['Purchase Address'].tolist()
x=[]
for i in c:
    q=re.split(",",i)
    x.append(q[1])
d['city']=pd.Series(x)
d
Out[71]:
Order Date Order ID Product Product_ean Category Purchase Address Quantity Ordered Price Each Cost price turnover margin city
0 2019-01-22 21:25:00 141234 iPhone 5.640000e+12 Phones 944 Walnut St, Boston, MA 02215 1 700.00 231.0000 700.00 469.0000 Boston
1 2019-01-28 14:15:00 141235 Lightning Charging Cable 5.560000e+12 Accessories 185 Maple St, Portland, OR 97035 1 14.95 7.4750 14.95 7.4750 Portland
2 2019-01-17 13:33:00 141236 Wired Headphones 2.110000e+12 Accessories 538 Adams St, San Francisco, CA 94016 2 11.99 5.9950 23.98 11.9900 San Francisco
3 2019-01-05 20:33:00 141237 27in FHD Monitor 3.070000e+12 Accessories 738 10th St, Los Angeles, CA 90001 1 149.99 97.4935 149.99 52.4965 Los Angeles
4 2019-01-25 11:59:00 141238 Wired Headphones 9.690000e+12 Accessories 387 10th St, Austin, TX 73301 1 11.99 5.9950 11.99 5.9950 Austin
... ... ... ... ... ... ... ... ... ... ... ... ...
185961 2019-12-11 20:58:00 319666 Lightning Charging Cable 6.550000e+12 Accessories 14 Madison St, San Francisco, CA 94016 1 14.95 7.4750 14.95 7.4750 NaN
185962 2019-12-01 12:01:00 319667 AA Batteries (4-pack) 5.350000e+12 Accessories 549 Willow St, Los Angeles, CA 90001 2 3.84 1.9200 7.68 3.8400 NaN
185963 2019-12-09 06:43:00 319668 Vareebadd Phone 2.670000e+12 Accessories 273 Wilson St, Seattle, WA 98101 1 400.00 132.0000 400.00 268.0000 NaN
185964 2019-12-03 10:39:00 319669 Wired Headphones 5.220000e+12 Accessories 778 River St, Dallas, TX 75001 1 11.99 5.9950 11.99 5.9950 NaN
185965 2019-12-21 21:45:00 319670 Bose SoundSport Headphones 8.080000e+12 Accessories 747 Chestnut St, Los Angeles, CA 90001 1 99.99 49.9950 99.99 49.9950 NaN

185949 rows × 12 columns